Data Visualization

library(tidyverse)
## ── Attaching packages ──────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ─────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Plotting with ggplot2

surveys_complete <- read_csv("data/portal_data_joined.csv")
## Parsed with column specification:
## cols(
##   record_id = col_double(),
##   month = col_double(),
##   day = col_double(),
##   year = col_double(),
##   plot_id = col_double(),
##   species_id = col_character(),
##   sex = col_character(),
##   hindfoot_length = col_double(),
##   weight = col_double(),
##   genus = col_character(),
##   species = col_character(),
##   taxa = col_character(),
##   plot_type = col_character()
## )
ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
  geom_point()
## Warning: Removed 4048 rows containing missing values (geom_point).

surveys_plot <- ggplot(data = surveys_complete, 
                       mapping = aes(x = weight, y = hindfoot_length))
surveys_plot+
  geom_point()
## Warning: Removed 4048 rows containing missing values (geom_point).

Challenge

library("hexbin")
surveys_plot +
 geom_hex()
## Warning: Removed 4048 rows containing non-finite values (stat_binhex).

A hexagonal is more useful when there is a high density of points in an area compared to a scatter plot. On the other hand if there are less points, a scatterplot is more precise.

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1)
## Warning: Removed 4048 rows containing missing values (geom_point).

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1, color = "blue")
## Warning: Removed 4048 rows containing missing values (geom_point).

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1, aes(color = species_id))
## Warning: Removed 4048 rows containing missing values (geom_point).

str(surveys_complete)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 34786 obs. of  13 variables:
##  $ record_id      : num  1 72 224 266 349 363 435 506 588 661 ...
##  $ month          : num  7 8 9 10 11 11 12 1 2 3 ...
##  $ day            : num  16 19 13 16 12 12 10 8 18 11 ...
##  $ year           : num  1977 1977 1977 1977 1977 ...
##  $ plot_id        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ species_id     : chr  "NL" "NL" "NL" "NL" ...
##  $ sex            : chr  "M" "M" NA NA ...
##  $ hindfoot_length: num  32 31 NA NA NA NA NA NA NA NA ...
##  $ weight         : num  NA NA NA NA NA NA NA NA 218 NA ...
##  $ genus          : chr  "Neotoma" "Neotoma" "Neotoma" "Neotoma" ...
##  $ species        : chr  "albigula" "albigula" "albigula" "albigula" ...
##  $ taxa           : chr  "Rodent" "Rodent" "Rodent" "Rodent" ...
##  $ plot_type      : chr  "Control" "Control" "Control" "Control" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   record_id = col_double(),
##   ..   month = col_double(),
##   ..   day = col_double(),
##   ..   year = col_double(),
##   ..   plot_id = col_double(),
##   ..   species_id = col_character(),
##   ..   sex = col_character(),
##   ..   hindfoot_length = col_double(),
##   ..   weight = col_double(),
##   ..   genus = col_character(),
##   ..   species = col_character(),
##   ..   taxa = col_character(),
##   ..   plot_type = col_character()
##   .. )

Challenge

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1, aes(color = plot_type))
## Warning: Removed 4048 rows containing missing values (geom_point).

This is not a good way to show this type of data because the different colors are too mixed together and small to contribute any information to the graph.

Boxplot

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_boxplot()
## Warning: Removed 2503 rows containing non-finite values (stat_boxplot).

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_boxplot(alpha = 0) +
    geom_jitter(alpha = 0.3, color = "tomato")
## Warning: Removed 2503 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2503 rows containing missing values (geom_point).

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
  geom_jitter(alpha = 0.3, color = "tomato")+
  geom_boxplot(alpha = 0)
## Warning: Removed 2503 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2503 rows containing missing values (geom_point).

Challenge

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
  geom_jitter(alpha = 0.3, color = "tomato")+
  geom_violin()
## Warning: Removed 2503 rows containing non-finite values (stat_ydensity).
## Warning: Removed 2503 rows containing missing values (geom_point).

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
  geom_jitter(alpha = 0.3, color = "tomato")+
  geom_violin()+
  scale_y_log10()
## Warning: Removed 2503 rows containing non-finite values (stat_ydensity).
## Warning: Removed 2503 rows containing missing values (geom_point).

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
  geom_jitter(alpha = 0.3, aes(color = plot_id)) +
  geom_boxplot(alpha = 0)
## Warning: Removed 3348 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3348 rows containing missing values (geom_point).

Plotting time series data

library(tidyverse)
surveys_complete <- read_csv("data/portal_data_joined.csv")
## Parsed with column specification:
## cols(
##   record_id = col_double(),
##   month = col_double(),
##   day = col_double(),
##   year = col_double(),
##   plot_id = col_double(),
##   species_id = col_character(),
##   sex = col_character(),
##   hindfoot_length = col_double(),
##   weight = col_double(),
##   genus = col_character(),
##   species = col_character(),
##   taxa = col_character(),
##   plot_type = col_character()
## )
yearly_counts <- surveys_complete %>%
  count(year, genus)
ggplot(data = yearly_counts, aes(x = year, y = n)) +
     geom_line()

ggplot(data = yearly_counts, aes(x = year, y = n, group = genus)) +
    geom_line()

ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
    geom_line()

Integrating the pipe operator with ggplot2

yearly_counts %>% 
    ggplot(mapping = aes(x = year, y = n, color = genus)) +
    geom_line()

yearly_counts_graph <- surveys_complete %>%
    count(year, genus) %>% 
    ggplot(mapping = aes(x = year, y = n, color = genus)) +
    geom_line()

yearly_counts_graph

Faceting

ggplot(data = yearly_counts, aes(x = year, y = n)) +
    geom_line() +
    facet_wrap(facets = vars(genus), ncol = 3)
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

yearly_sex_counts <- surveys_complete %>%
                      count(year, genus, sex)
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_wrap(facets =  vars(genus), ncol=3)
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(rows = vars(sex), cols =  vars(genus))
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(rows = vars(genus))
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(cols = vars(genus))
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

ggplot2 themes

ggplot(data = yearly_sex_counts, 
        mapping = aes(x = year, y = n, color = sex)) +
     geom_line() +
     facet_wrap(vars(genus), ncol=3) +
     theme_bw()
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

Challenge

yearly_weight <- surveys_complete %>%
                group_by(year, species_id) %>%
                 summarize(avg_weight = mean(weight))
ggplot(data = yearly_weight, aes(x = year, y = avg_weight)) +
    geom_line() +
    facet_wrap(facets = vars(species_id), ncol=3)
## Warning: Removed 89 rows containing missing values (geom_path).
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

Customization

ggplot(data = yearly_sex_counts, aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus), ncol=3) +
    labs(title = "Observed genera through time",
         x = "Year of observation",
         y = "Number of individuals") +
    theme_bw()
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus), ncol=3) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(text=element_text(size = 16))
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus), ncol=3) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90, hjust = 0.5, vjust = 0.5),
                        axis.text.y = element_text(colour = "grey20", size = 12),
                        strip.text = element_text(face = "italic"),
                        text = element_text(size = 16))
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

grey_theme <- theme(axis.text.x = element_text(colour="grey20", size = 12, 
                                               angle = 90, hjust = 0.5, 
                                               vjust = 0.5),
                    axis.text.y = element_text(colour = "grey20", size = 12),
                    text=element_text(size = 16))

ggplot(surveys_complete, aes(x = species_id, y = hindfoot_length)) +
    geom_boxplot() +
    grey_theme
## Warning: Removed 3348 rows containing non-finite values (stat_boxplot).

Challenge

yearly_counts %>% 
  ggplot(mapping = aes(x = year, y = n)) +
  facet_wrap(vars(genus), ncol=3)+
  geom_line()+
  labs(title = "Observed genera through time",
       x = "Year of observation",
       y = "number of individuals")+
  theme_bw()+
  theme(text=element_text(size = 16))
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?